{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "c261e5f4-17a8-40da-beb9-599f1717e0fe",
   "metadata": {},
   "source": [
    "### 1. 安装HuggingFace 并下载模型到本地"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "02785614-9268-41c8-85a5-d579490edbbf",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: sagemaker in /opt/conda/lib/python3.7/site-packages (2.183.0)\n",
      "Requirement already satisfied: attrs<24,>=23.1.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (23.1.0)\n",
      "Requirement already satisfied: boto3<2.0,>=1.26.131 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.28.41)\n",
      "Requirement already satisfied: cloudpickle==2.2.1 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (2.2.1)\n",
      "Requirement already satisfied: google-pasta in /opt/conda/lib/python3.7/site-packages (from sagemaker) (0.2.0)\n",
      "Requirement already satisfied: numpy<2.0,>=1.9.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.21.6)\n",
      "Requirement already satisfied: protobuf<5.0,>=3.12 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (3.20.3)\n",
      "Requirement already satisfied: smdebug-rulesconfig==1.0.1 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.0.1)\n",
      "Requirement already satisfied: importlib-metadata<7.0,>=1.4.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (4.13.0)\n",
      "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (23.1)\n",
      "Requirement already satisfied: pandas in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.3.5)\n",
      "Requirement already satisfied: pathos in /opt/conda/lib/python3.7/site-packages (from sagemaker) (0.3.0)\n",
      "Requirement already satisfied: schema in /opt/conda/lib/python3.7/site-packages (from sagemaker) (0.7.5)\n",
      "Requirement already satisfied: PyYAML~=6.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (6.0)\n",
      "Requirement already satisfied: jsonschema in /opt/conda/lib/python3.7/site-packages (from sagemaker) (3.2.0)\n",
      "Requirement already satisfied: platformdirs in /opt/conda/lib/python3.7/site-packages (from sagemaker) (3.5.3)\n",
      "Requirement already satisfied: tblib==1.7.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.7.0)\n",
      "Requirement already satisfied: botocore<1.32.0,>=1.31.41 in /opt/conda/lib/python3.7/site-packages (from boto3<2.0,>=1.26.131->sagemaker) (1.31.41)\n",
      "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /opt/conda/lib/python3.7/site-packages (from boto3<2.0,>=1.26.131->sagemaker) (1.0.1)\n",
      "Requirement already satisfied: s3transfer<0.7.0,>=0.6.0 in /opt/conda/lib/python3.7/site-packages (from boto3<2.0,>=1.26.131->sagemaker) (0.6.1)\n",
      "Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata<7.0,>=1.4.0->sagemaker) (2.2.0)\n",
      "Requirement already satisfied: typing-extensions>=3.6.4 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata<7.0,>=1.4.0->sagemaker) (4.6.3)\n",
      "Requirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from google-pasta->sagemaker) (1.16.0)\n",
      "Requirement already satisfied: pyrsistent>=0.14.0 in /opt/conda/lib/python3.7/site-packages (from jsonschema->sagemaker) (0.15.7)\n",
      "Requirement already satisfied: setuptools in /opt/conda/lib/python3.7/site-packages (from jsonschema->sagemaker) (65.5.1)\n",
      "Requirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.7/site-packages (from pandas->sagemaker) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2017.3 in /opt/conda/lib/python3.7/site-packages (from pandas->sagemaker) (2019.3)\n",
      "Requirement already satisfied: ppft>=1.7.6.6 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker) (1.7.6.6)\n",
      "Requirement already satisfied: dill>=0.3.6 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker) (0.3.6)\n",
      "Requirement already satisfied: pox>=0.3.2 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker) (0.3.2)\n",
      "Requirement already satisfied: multiprocess>=0.70.14 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker) (0.70.14)\n",
      "Requirement already satisfied: contextlib2>=0.5.5 in /opt/conda/lib/python3.7/site-packages (from schema->sagemaker) (0.6.0.post1)\n",
      "Requirement already satisfied: urllib3<1.27,>=1.25.4 in /opt/conda/lib/python3.7/site-packages (from botocore<1.32.0,>=1.31.41->boto3<2.0,>=1.26.131->sagemaker) (1.26.16)\n",
      "\u001b[33mDEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n",
      "\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
      "\u001b[0m"
     ]
    }
   ],
   "source": [
    "!pip install huggingface-hub -Uqq \n",
    "!pip install -U sagemaker"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "0ba24701-47db-4107-9a6c-1667038d0054",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!rm -rf ./LLM_qwen_int4_model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "9e6bd7ee-16a3-4f5a-8857-8bbba83eb9e7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from huggingface_hub import snapshot_download\n",
    "from pathlib import Path\n",
    "local_model_path = Path(\"./LLM_qwen_14b_int4_model\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "c3469632-4174-4df4-a7d1-ef167561c626",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "local_model_path.mkdir(exist_ok=True)\n",
    "# model_name = \"Qwen/Qwen-7B-Chat-Int4\"\n",
    "# commit_hash = \"955bbfa8c8f0c592b7713f1186c2c9ab477ef862\"\n",
    "\n",
    "\n",
    "model_name = \"Qwen/Qwen-14B-Chat-Int4\"\n",
    "commit_hash = \"0f5e18f5f18b3ced68be965099f091189964ed85\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "94e8abc5-a58e-40e2-b1e6-fbf48307c716",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f3233124041d40a384f04542b135419d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Fetching 31 files', max=31.0, style=ProgressStyle(descrip…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3f806509196f423f87cead318a9fbf06",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)ter_showcase_001.jpg', max=138442.0, style…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fa14b2b085224e618780f3a9db389fc6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)091189964ed85/NOTICE', max=2703.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d61c46e0749241c7832c16a938c57c33",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)91189964ed85/LICENSE', max=6896.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f67bb4f70b9747f288c5ce065fa24c44",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)/assets/cli_demo.gif', max=249464.0, style…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "db6856aaaf1b40729427cc362645cd92",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)4ed85/.gitattributes', max=1702.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cd38b53ca82b4c99865a796ea3acaaff",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading hfagent_chat_1.png', max=1708738.0, style=Pro…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b5547939512c4915840da8628f0f317f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading hfagent_chat_2.png', max=1927640.0, style=Pro…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f3f101593ec543909a3eb3e3df6c772c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)189964ed85/README.md', max=27906.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9439e504f16e4d4d84186eb7719671a4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)ed85/assets/logo.jpg', max=65062.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8e3da6f43cb54a2198a928dff369fef8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)act_showcase_001.png', max=309444.0, style…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ee2bf7efbcc64763bc572b5d442ec086",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading hfagent_run.png', max=2770957.0, style=Progre…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ac5f16f0352a4db199f11f460f957ffd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)85/assets/wechat.png', max=104177.0, style…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e1b65847901f49e8a19432607c6d1003",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)act_showcase_002.png', max=629511.0, style…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "295dff3ee45d4952b157ed7fa5487e8e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)onfiguration_qwen.py', max=2263.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8db86a92f3d84363bc1d585614261776",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)9964ed85/config.json', max=1199.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5c3190fefca8445b990b5642cd0e6707",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)neration_config.json', max=238.0, style=Pr…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1ea607d331c4416d8c02f9e86fb2f3b7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)utogptq_cuda_256.cpp', max=8404.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9ee3603b52ef48e4be7382ffe8c9570c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)q_cuda_kernel_256.cu', max=51991.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "30b54269b6344ebd8e1040973dfc2726",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)rnels/cpp_kernels.py', max=1940.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "54c4ea681f4c4e2b80c234d5696b33b9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)of-00005.safetensors', max=2047828192.0, s…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e8ca08818b8d407587a795caecc6a503",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)of-00005.safetensors', max=2024441368.0, s…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "244669e5304641e08b68db728a696167",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)fetensors.index.json', max=82156.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3e15db2792774db4939120870bdf4138",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)of-00005.safetensors', max=1557135488.0, s…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c7e32378c88b490a8730ab0d20b968f8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)of-00005.safetensors', max=2041419864.0, s…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "acd9354317104099b19bb60959120589",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)of-00005.safetensors', max=2004933264.0, s…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "66f1f90aed7a471d9c6dfb832cc258bb",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)d85/modeling_qwen.py', max=56859.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8db13932ae264ec2995dbe9b45fe2316",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)quantize_config.json', max=211.0, style=Pr…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "380a53f487314926b862675d9d736cf3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)64ed85/qwen.tiktoken', max=2561218.0, styl…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b8dda7c72908422995e4ec31a08888e7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)_generation_utils.py', max=14604.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3b8951302c7c46e5acf95052964311fc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)okenizer_config.json', max=193.0, style=Pr…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0249af8ec3964eb795a997589bceacd0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)tokenization_qwen.py', max=8443.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85'"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "snapshot_download(repo_id=model_name, revision=commit_hash, cache_dir=local_model_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8d666c79-b039-4258-ac3b-46b19e63c3b8",
   "metadata": {},
   "source": [
    "### 2. 把模型拷贝到S3为后续部署做准备"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "e9431deb-6359-442d-847b-1563f8dd3854",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import sagemaker\n",
    "from sagemaker import image_uris\n",
    "import boto3\n",
    "import os\n",
    "import time\n",
    "import json\n",
    "\n",
    "role = sagemaker.get_execution_role()  # execution role for the endpoint\n",
    "sess = sagemaker.session.Session()  # sagemaker session for interacting with different AWS APIs\n",
    "bucket = sess.default_bucket()  # bucket to house artifacts\n",
    "\n",
    "region = sess._region_name\n",
    "account_id = sess.account_id()\n",
    "\n",
    "s3_client = boto3.client(\"s3\")\n",
    "sm_client = boto3.client(\"sagemaker\")\n",
    "smr_client = boto3.client(\"sagemaker-runtime\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "40dd8f16-ae7c-48bf-8e52-1a15425fa74d",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "s3_code_prefix: LLM-RAG/workshop/LLM_qwen_14b_int4_deploy_code\n",
      "model_snapshot_path: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85\n"
     ]
    }
   ],
   "source": [
    "s3_model_prefix = \"LLM-RAG/workshop/LLM_qwen_14b_int4_model\"  # folder where model checkpoint will go\n",
    "model_snapshot_path = list(local_model_path.glob(\"**/snapshots/*\"))[0]\n",
    "s3_code_prefix = \"LLM-RAG/workshop/LLM_qwen_14b_int4_deploy_code\"\n",
    "print(f\"s3_code_prefix: {s3_code_prefix}\")\n",
    "print(f\"model_snapshot_path: {model_snapshot_path}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "067292c9-c066-4649-a61f-b460a24da584",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/.gitattributes to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/.gitattributes\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/NOTICE to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/NOTICE\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/LICENSE to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/LICENSE\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/README.md to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/README.md\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/assets/wechat.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/assets/wechat.png\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/assets/cli_demo.gif to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/assets/cli_demo.gif\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/assets/code_interpreter_showcase_001.jpg to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/assets/code_interpreter_showcase_001.jpg\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/assets/react_showcase_002.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/assets/react_showcase_002.png\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/assets/logo.jpg to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/assets/logo.jpg\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/kernels/cpp_kernels.py to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/kernels/cpp_kernels.py\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/config.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/config.json\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/assets/react_showcase_001.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/assets/react_showcase_001.png\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/assets/hfagent_run.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/assets/hfagent_run.png\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/assets/hfagent_chat_1.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/assets/hfagent_chat_1.png\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/assets/hfagent_chat_2.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/assets/hfagent_chat_2.png\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/kernels/cache_autogptq_cuda_256.cpp to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/kernels/cache_autogptq_cuda_256.cpp\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/kernels/cache_autogptq_cuda_kernel_256.cu to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/kernels/cache_autogptq_cuda_kernel_256.cu\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/generation_config.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/generation_config.json\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/configuration_qwen.py to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/configuration_qwen.py\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/model.safetensors.index.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/model.safetensors.index.json\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/model-00004-of-00005.safetensors to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/model-00004-of-00005.safetensors\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/modeling_qwen.py to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/modeling_qwen.py\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/quantize_config.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/quantize_config.json\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/qwen_generation_utils.py to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/qwen_generation_utils.py\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/qwen.tiktoken to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/qwen.tiktoken\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/tokenization_qwen.py to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/tokenization_qwen.py\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/tokenizer_config.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/tokenizer_config.json\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/model-00003-of-00005.safetensors to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/model-00003-of-00005.safetensors\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/model-00002-of-00005.safetensors to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/model-00002-of-00005.safetensors\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/model-00005-of-00005.safetensors to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/model-00005-of-00005.safetensors\n",
      "upload: LLM_qwen_14b_int4_model/models--Qwen--Qwen-14B-Chat-Int4/snapshots/0f5e18f5f18b3ced68be965099f091189964ed85/model-00001-of-00005.safetensors to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/model-00001-of-00005.safetensors\n"
     ]
    }
   ],
   "source": [
    "!aws s3 cp --recursive {model_snapshot_path} s3://{bucket}/{s3_model_prefix}"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "696b70c3-90f1-4175-95bf-568bafbcd383",
   "metadata": {},
   "source": [
    "### 3. 模型部署准备（entrypoint脚本，容器镜像，服务配置）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "6f7c4277-4480-42c6-aee6-1fbcca94eb82",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Image going to be used is ---- > 763104351884.dkr.ecr.us-east-2.amazonaws.com/djl-inference:0.23.0-deepspeed0.9.5-cu118\n"
     ]
    }
   ],
   "source": [
    "#中国区需要替换为下面的image_uri\n",
    "# inference_image_uri = (\n",
    "#     f\"727897471807.dkr.ecr.{region}.amazonaws.com.cn/djl-inference:0.23.0-deepspeed0.9.5-cu118\"\n",
    "# )\n",
    "\n",
    "\n",
    "inference_image_uri = image_uris.retrieve(\n",
    "    framework=\"djl-deepspeed\",\n",
    "    region=sess.boto_session.region_name,\n",
    "    version=\"0.23.0\"\n",
    ")\n",
    "print(f\"Image going to be used is ---- > {inference_image_uri}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "8d771bdb-11d2-45d2-9bef-face29221838",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!mkdir -p LLM_qwen_14b_int4_deploy_code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "e5348ecb-43df-4094-97d8-a6723004862a",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting LLM_qwen_14b_int4_deploy_code/model.py\n"
     ]
    }
   ],
   "source": [
    "%%writefile LLM_qwen_14b_int4_deploy_code/model.py\n",
    "from djl_python import Input, Output\n",
    "import torch\n",
    "import logging\n",
    "import math\n",
    "import os\n",
    "\n",
    "from transformers import AutoTokenizer,AutoModelForCausalLM\n",
    "from transformers.generation import GenerationConfig\n",
    "# from auto_gptq import AutoGPTQForCausalLM\n",
    "\n",
    "\n",
    "STOP_flag = \"[DONE]\"\n",
    "\n",
    "\n",
    "def load_model(properties):\n",
    "    tensor_parallel = properties[\"tensor_parallel_degree\"]\n",
    "    model_location = properties['model_dir']\n",
    "    if \"model_id\" in properties:\n",
    "        model_location = properties['model_id']\n",
    "    logging.info(f\"Loading model in {model_location}\")\n",
    "    \n",
    "    tokenizer = AutoTokenizer.from_pretrained(model_location, trust_remote_code=True)\n",
    "    model = AutoModelForCausalLM.from_pretrained(model_location, device_map=\"auto\", trust_remote_code=True).eval()\n",
    "    model.generation_config = GenerationConfig.from_pretrained(model_location, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参\n",
    "\n",
    "    return model, tokenizer, model.generation_config\n",
    "\n",
    "\n",
    "model = None\n",
    "tokenizer = None\n",
    "generator = None\n",
    "config = None\n",
    "\n",
    "def stream_items(prompt, history, max_length, top_p, temperature):\n",
    "    global model, tokenizer, config\n",
    "    size = 0\n",
    "    response = \"\"\n",
    "    config.max_new_tokens = max_length\n",
    "    config.top_p = top_p\n",
    "    \n",
    "    ##传入temperature会报错\n",
    "    ##model.generation_config.temperature = temperature \n",
    "    res_generator = model.chat_stream(tokenizer, prompt, history=history,generation_config=config)\n",
    "    for response in res_generator:\n",
    "        this_response = response[size:]\n",
    "        size = len(response)\n",
    "        stream_buffer = { \"outputs\":this_response,\"finished\": False}\n",
    "        yield stream_buffer\n",
    "    ## stop\n",
    "    # yield {\"query\": prompt, \"outputs\": STOP_flag, \"response\": response, \"history\": [], \"finished\": True}\n",
    "\n",
    "\n",
    "def handle(inputs: Input):\n",
    "    global model, tokenizer,config\n",
    "    if not model:\n",
    "        model, tokenizer,config = load_model(inputs.get_properties())\n",
    "\n",
    "    if inputs.is_empty():\n",
    "        return None\n",
    "    data = inputs.get_as_json()\n",
    "    \n",
    "    input_sentences = data[\"inputs\"]\n",
    "    params = data[\"parameters\"]\n",
    "    history = data.get(\"history\",[])\n",
    "    stream = data.get('stream')\n",
    "    print(f'input prompt:{input_sentences}')   \n",
    "    print(f'generation config:{config}')\n",
    "    outputs = Output()\n",
    "    if stream:\n",
    "        outputs.add_property(\"content-type\", \"application/jsonlines\")\n",
    "        outputs.add_stream_content(stream_items(input_sentences,history=history,**params))\n",
    "    else:\n",
    "        config.max_new_tokens = params.get('max_length',1024)\n",
    "        config.top_p = params.get('top_p',1)\n",
    "        response, history = model.chat(tokenizer, input_sentences, history=history,generation_config=config)\n",
    "        result = {\"outputs\": response, \"history\" : history}\n",
    "        outputs.add_as_json(result)\n",
    "        \n",
    "    return outputs"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a06d1e60-3914-4059-a08f-05ac26761165",
   "metadata": {},
   "source": [
    "#### Note: option.s3url 需要按照自己的账号进行修改"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "8996fe44-8e70-468b-abc1-38187cb33f4f",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting LLM_qwen_14b_int4_deploy_code/serving.properties\n"
     ]
    }
   ],
   "source": [
    "%%writefile LLM_qwen_14b_int4_deploy_code/serving.properties\n",
    "engine=Python\n",
    "option.tensor_parallel_degree=1\n",
    "option.enable_streaming=True\n",
    "option.predict_timeout=240\n",
    "option.s3url = s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_model/"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "feef22a2-27b9-4018-a46b-6a99b532512f",
   "metadata": {},
   "source": [
    "#### 注意: 必须把transformers升级到4.27.1以上，否则会出现 [Issue344](https://github.com/THUDM/ChatGLM-6B/issues/344)\n",
    "\n",
    "如果是中国区建议添加国内的pip镜像,如下代码所示\n",
    "```\n",
    "%%writefile LLM_chatglm_deploy_code/requirements.txt\n",
    "-i https://pypi.tuna.tsinghua.edu.cn/simple\n",
    "transformers==4.28.1\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "7b7e76c6-6dbc-47fc-9f47-4765c526ab76",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting LLM_qwen_14b_int4_deploy_code/requirements.txt\n"
     ]
    }
   ],
   "source": [
    "%%writefile LLM_qwen_14b_int4_deploy_code/requirements.txt\n",
    "transformers==4.32.0\n",
    "accelerate\n",
    "tiktoken\n",
    "einops\n",
    "scipy\n",
    "transformers_stream_generator==0.0.4\n",
    "peft\n",
    "deepspeed\n",
    "auto-gptq\n",
    "optimum"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "199907e8-dde4-43b5-a6f3-82f46a6bf6f3",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# !pip install auto-gptq"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "0ae6734a-aacd-410d-818d-0a962697c3c4",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LLM_qwen_14b_int4_deploy_code/\n",
      "LLM_qwen_14b_int4_deploy_code/model.py\n",
      "LLM_qwen_14b_int4_deploy_code/requirements.txt\n",
      "LLM_qwen_14b_int4_deploy_code/serving.properties\n"
     ]
    }
   ],
   "source": [
    "!rm model.tar.gz\n",
    "!cd LLM_qwen_14b_int4_deploy_code && rm -rf \".ipynb_checkpoints\"\n",
    "!tar czvf model.tar.gz LLM_qwen_14b_int4_deploy_code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "0f77dc76-6d8c-4665-ba88-f03e887c136c",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "S3 Code or Model tar ball uploaded to --- > s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_14b_int4_deploy_code/model.tar.gz\n"
     ]
    }
   ],
   "source": [
    "s3_code_artifact = sess.upload_data(\"model.tar.gz\", bucket, s3_code_prefix)\n",
    "print(f\"S3 Code or Model tar ball uploaded to --- > {s3_code_artifact}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a5853daa-b8a3-4485-8c0a-64bf83e93a18",
   "metadata": {},
   "source": [
    "### 4. 创建模型 & 创建endpoint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "ef974ca1-9638-45a8-9145-ea9d03b2b072",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "qwen-14B-int4-2023-09-26-01-30-53-032\n",
      "Image going to be used is ---- > 763104351884.dkr.ecr.us-east-2.amazonaws.com/djl-inference:0.23.0-deepspeed0.9.5-cu118\n",
      "Created Model: arn:aws:sagemaker:us-east-2:946277762357:model/qwen-14b-int4-2023-09-26-01-30-53-032\n"
     ]
    }
   ],
   "source": [
    "from sagemaker.utils import name_from_base\n",
    "import boto3\n",
    "\n",
    "model_name = name_from_base(f\"qwen-14B-int4\") #Note: Need to specify model_name\n",
    "print(model_name)\n",
    "print(f\"Image going to be used is ---- > {inference_image_uri}\")\n",
    "\n",
    "create_model_response = sm_client.create_model(\n",
    "    ModelName=model_name,\n",
    "    ExecutionRoleArn=role,\n",
    "    PrimaryContainer={\n",
    "        \"Image\": inference_image_uri,\n",
    "        \"ModelDataUrl\": s3_code_artifact\n",
    "    },\n",
    "    \n",
    ")\n",
    "model_arn = create_model_response[\"ModelArn\"]\n",
    "\n",
    "print(f\"Created Model: {model_arn}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "233bb3a4-d737-41ad-8fcc-7082c6278e8c",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'EndpointConfigArn': 'arn:aws:sagemaker:us-east-2:946277762357:endpoint-config/qwen-14b-int4-2023-09-26-01-30-53-032-config',\n",
       " 'ResponseMetadata': {'RequestId': '1d97e0c2-ae8b-4e17-ac4b-b0f2166d16f3',\n",
       "  'HTTPStatusCode': 200,\n",
       "  'HTTPHeaders': {'x-amzn-requestid': '1d97e0c2-ae8b-4e17-ac4b-b0f2166d16f3',\n",
       "   'content-type': 'application/x-amz-json-1.1',\n",
       "   'content-length': '125',\n",
       "   'date': 'Tue, 26 Sep 2023 01:30:54 GMT'},\n",
       "  'RetryAttempts': 0}}"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "endpoint_config_name = f\"{model_name}-config\"\n",
    "endpoint_name = f\"{model_name}-endpoint\"\n",
    "\n",
    "#Note: ml.g4dn.2xlarge 也可以选择\n",
    "endpoint_config_response = sm_client.create_endpoint_config(\n",
    "    EndpointConfigName=endpoint_config_name,\n",
    "    ProductionVariants=[\n",
    "        {\n",
    "            \"VariantName\": \"variant1\",\n",
    "            \"ModelName\": model_name,\n",
    "            \"InstanceType\": \"ml.g5.2xlarge\",\n",
    "            \"InitialInstanceCount\": 1,\n",
    "            # \"VolumeSizeInGB\" : 400,\n",
    "            # \"ModelDataDownloadTimeoutInSeconds\": 2400,\n",
    "            \"ContainerStartupHealthCheckTimeoutInSeconds\": 10*60,\n",
    "        },\n",
    "    ],\n",
    ")\n",
    "endpoint_config_response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "734a39b0-473e-4421-94c8-74d2b4105038",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Created Endpoint: arn:aws:sagemaker:us-east-2:946277762357:endpoint/qwen-14b-int4-2023-09-26-01-30-53-032-endpoint\n"
     ]
    }
   ],
   "source": [
    "create_endpoint_response = sm_client.create_endpoint(\n",
    "    EndpointName=f\"{endpoint_name}\", EndpointConfigName=endpoint_config_name\n",
    ")\n",
    "print(f\"Created Endpoint: {create_endpoint_response['EndpointArn']}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1262e826-a810-401d-a5a9-f62febb24e5f",
   "metadata": {},
   "source": [
    "#### 持续检测模型部署进度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "08969928-6b9e-4d9c-a033-a31f5f77bdfb",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Status: Creating\n",
      "Status: Creating\n",
      "Status: Creating\n",
      "Status: Creating\n",
      "Status: Creating\n",
      "Status: Creating\n",
      "Status: Creating\n",
      "Status: Creating\n",
      "Status: InService\n",
      "Arn: arn:aws:sagemaker:us-east-2:946277762357:endpoint/qwen-14b-int4-2023-09-26-01-30-53-032-endpoint\n",
      "Status: InService\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "resp = sm_client.describe_endpoint(EndpointName=endpoint_name)\n",
    "status = resp[\"EndpointStatus\"]\n",
    "print(\"Status: \" + status)\n",
    "\n",
    "while status == \"Creating\":\n",
    "    time.sleep(60)\n",
    "    resp = sm_client.describe_endpoint(EndpointName=endpoint_name)\n",
    "    status = resp[\"EndpointStatus\"]\n",
    "    print(\"Status: \" + status)\n",
    "\n",
    "print(\"Arn: \" + resp[\"EndpointArn\"])\n",
    "print(\"Status: \" + status)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d985b427-3959-46f7-9a50-5a2b45e2d513",
   "metadata": {},
   "source": [
    "### 5. 模型测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "e56bfdaa-3469-4784-aa8a-e32177cde3f2",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 7.03 ms, sys: 1.19 ms, total: 8.22 ms\n",
      "Wall time: 15.6 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "import json\n",
    "import boto3\n",
    "\n",
    "smr_client = boto3.client(\"sagemaker-runtime\")\n",
    "\n",
    "parameters = {\n",
    "  \"max_length\": 1024,\n",
    "  \"temperature\": 0.1,\n",
    "  \"top_p\":0.8\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "ae5983aa-0c91-4c78-a63f-7192a39a8cfb",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import io\n",
    "\n",
    "\n",
    "class StreamScanner:\n",
    "    \"\"\"\n",
    "    A helper class for parsing the InvokeEndpointWithResponseStream event stream. \n",
    "    \n",
    "    The output of the model will be in the following format:\n",
    "    ```\n",
    "    b'{\"outputs\": [\" a\"]}\\n'\n",
    "    b'{\"outputs\": [\" challenging\"]}\\n'\n",
    "    b'{\"outputs\": [\" problem\"]}\\n'\n",
    "    ...\n",
    "    ```\n",
    "    \n",
    "    While usually each PayloadPart event from the event stream will contain a byte array \n",
    "    with a full json, this is not guaranteed and some of the json objects may be split across\n",
    "    PayloadPart events. For example:\n",
    "    ```\n",
    "    {'PayloadPart': {'Bytes': b'{\"outputs\": '}}\n",
    "    {'PayloadPart': {'Bytes': b'[\" problem\"]}\\n'}}\n",
    "    ```\n",
    "    \n",
    "    This class accounts for this by concatenating bytes written via the 'write' function\n",
    "    and then exposing a method which will return lines (ending with a '\\n' character) within\n",
    "    the buffer via the 'readlines' function. It maintains the position of the last read \n",
    "    position to ensure that previous bytes are not exposed again. \n",
    "    \"\"\"\n",
    "    \n",
    "    def __init__(self):\n",
    "        self.buff = io.BytesIO()\n",
    "        self.read_pos = 0\n",
    "        \n",
    "    def write(self, content):\n",
    "        self.buff.seek(0, io.SEEK_END)\n",
    "        self.buff.write(content)\n",
    "        \n",
    "    def readlines(self):\n",
    "        self.buff.seek(self.read_pos)\n",
    "        for line in self.buff.readlines():\n",
    "            if line[-1] != b'\\n':\n",
    "                self.read_pos += len(line)\n",
    "                yield line[:-1]\n",
    "                \n",
    "    def reset(self):\n",
    "        self.read_pos = 0"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ad367ddf-96b6-40e1-938e-2a9aa0f03b0c",
   "metadata": {},
   "source": [
    "## Stream"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "84f9219f-fa4d-413e-b02d-2047142b4a79",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SageMaker支持张量并行技术，它可以将一个模型分为多个层，并将这些层分布在多个设备上同时运行。这可以帮助我们利用多个GPU进行分布式训练，并最大限度地提高训练速度和减少内存使用量。在使用张量并行技术时，我们可以选择要将张量并行应用于哪些层或哪些层的子集，以适应不同的需求。\n",
      "time:5.816547393798828 s\n"
     ]
    }
   ],
   "source": [
    "# prompts1 = \"\"\"你\"\"\"\n",
    "import time\n",
    "\n",
    "start = time.time()\n",
    "prompts1 = \"\"\"写一篇500字的科幻小说，背景关于宇宙战争\"\"\"\n",
    "# prompts1 = \"\"\"AWS Clean Rooms 的FAQ文档有提到 Q: 是否发起者和数据贡献者都会被收费？A: 是单方收费，只有查询的接收方会收费。\n",
    "# 请问AWS Clean Rooms是多方都会收费吗？\n",
    "# \"\"\"\n",
    "prompts1 = \"请根据以下三个反引号中的内容，回答用户的问题，如果三个反引号中的内容的包含markdown格式的内容，如参考图片，链接等，请尽可能利用并按markdown格式输出。如果三个反引号中的内容为空，则回答不知道，不要随意发挥和编造答案。 \\n``` \\n\\nSageMaker 分布式模型并行库概述\\nSageMaker 包括分布式模型并行库，有助于在多个计算节点上有效地分发和训练 DL 模型，克服了在单个 GPU 上训练模型的相关限制。此外，该库还允许您利用 EFA 支持的设备获得最佳分布式训练，从而通过低延迟、高吞吐量和操作系统旁路来提高节点间通信性能。\\nSageMaker 分布式模型并行库概述\\n由于像 GPT-J 这样的大型模型具有数十亿个参数，其 GPU 内存占用量超过了单个芯片，因此必须在多个 GPU 之间对这种模型进行分区。SageMaker 模型并行（SMP）库支持在多个 GPU 之间自动对模型进行分区。使用 SageMaker 模型并行功能，SageMaker 会代表您运行初始分析作业，以分析模型的计算和内存需求。然后，这些信息用于决定如何在 GPU 之间对模型进行分区，以最大限度地提高目标，如最大限度地提高速度或最大限度地减少内存占用量。\\nSageMaker 分布式模型并行库概述\\n这项功能还支持可选的管道运行调度，以最大限度地提高可用 GPU 的总体利用率。前向传递期间的激活传播和后向传递期间的梯度传播需要顺序计算，这限制了 GPU 的使用量。SageMaker 通过将小批次拆分为微批次，以便在不同的 GPU 上并行处理，从而利用管道运行调度克服了顺序计算限制。SageMaker 模型并行支持两种管道运行模式：\\n\\n简单管道 – 该模式下，每个微批次的前向传递结束后，才开始后向传递。\\n交错管道 – 该模式下，微批次的后向运行尽可能优先。这样可以更快地释放用于激活的内存，从而更有效地利用内存。\\n\\nSageMaker 分布式模型并行库概述 - 使用 SageMaker 模型并行库\\n请参阅以下代码：\\n\\n@smp.step\\ndef train_step(model, data, target):\\n    output = model(data)\\n    loss = F.nll_loss(output, target, reduction=\\\"mean\\\")\\n    model.backward(Loss)\\n    \\n    return output, loss\\n\\nwith smp.tensor_parallelism():\\n    model = AutoModelForCausalLM.from_config(model_config)\\n    \\nmodel = smp.DistributedModel (model)\\noptimizer = smp.DistributedOptimizer(optimizer)Python\\nSageMaker 分布式模型并行库概述 - 使用 SageMaker 模型并行库\\nSageMaker 模型并行库的张量并行为以下 Hugging Face 变换器模型提供了[开箱即用的支持](https://docs.aws.amazon.com/sagemaker/latest/dg/model-parallel-extended-features-pytorch-hugging-face.html)：\\n\\n[GPT-2](https://github.com/aws/amazon-sagemaker-examples/tree/main/training/distributed_training/pytorch/model_parallel/gpt2)、[BERT](https://github.com/aws/amazon-sagemaker-examples/tree/main/training/distributed_training/pytorch/model_parallel/bert) 和 RoBERTa（在 SMP 库 v1.7.0 及更高版本中可用）\\n[GPT-J](https://github.com/aws/amazon-sagemaker-examples/tree/main/training/distributed_training/pytorch/model_parallel/gpt-j)（在 SMP 库 v1.8.0 及更高版本中可用）\\nGPT-Neo（在 SMP 库 v1.10.0 及更高版本中可用）\\nSageMaker 分布式模型并行库概述 - 使用 SageMaker 模型并行库\\n使用 SMP 库进行性能调整的最佳实践\\n在训练大型模型时，请考虑以下步骤，以便模型能以合理的批次大小在 GPU 内存中运行：\\n\\n为了提高性能，建议使用 GPU 内存更高、带宽互连更高的实例，如 p4d 和 p4de 实例。\\n在大多数情况下，可以启用[优化器状态分片](https://docs.aws.amazon.com/sagemaker/latest/dg/model-parallel-extended-features-pytorch-optimizer-state-sharding.html)，当模型有多个副本（已启用数据并行性）时，优化器状态分片会很有用。可以通过在 modelparallel 配置中设置 \\\"shard_optimizer_state\\\": True 来开启优化器状态分片。\\n使用[激活检查点](https://docs.aws.amazon.com/sagemaker/latest/dg/model-parallel-extended-features-pytorch-activation-checkpointing.html)，这是一种通过清除某些层的激活并在模型中选定模块的后向传递期间重新计算来减少内存使用量的技术。\\n使用[激活卸载](https://docs.aws.amazon.com/sagemaker/latest/dg/model-parallel-extended-features-pytorch-activation-offloading.html)，这是一项可以进一步减少内存使用量的附加功能。要使用激活卸载，请在 modelparallel 配置中设置 \\\"offload_activations\\\": True。当激活检查点和管道并行功能处于开启状态且微批次数大于一时使用。\\n启用[张量并行](https://docs.aws.amazon.com/sagemaker/latest/dg/model-parallel-extended-features-pytorch-tensor-parallelism.html)并增加并行度，其中并行度是 2 的幂。通常，出于性能考虑，张量并行仅限于一个节点内。\\n\\nSageMaker 分布式模型并行库概述 - 张量并行技术\\n各个层或 nn.Modules 利用张量并行技术划分到各个设备上，以便同时运行。下图是一个最简单的例子，说明该库如何将一个模型分为四层，以实现双向张量并行性（\\\"tensor_parallel_degree\\\": 2）。每个模型副本的层被一分为二（分成两半），并分布在两个 GPU 之间。本例中的数据并行度为八，因为模型并行配置还包括 \\\"pipeline_parallel_degree\\\": 1 和 \\\"ddp\\\": True。该库管理张量分布式模型各副本之间的通信。\\nSageMaker 分布式模型并行库概述 - 张量并行技术\\n![参考图片:张量并行技术](https://d2908q01vomqb2.cloudfront.net/f1f836cb4ea6efb2a0b1b99f41ad8b103eff4b59/2023/06/05/ML-8585-tensorp.jpg)\\nSageMaker 分布式模型并行库概述 - 张量并行技术\\n这一功能的好处是，您可以选择要将张量并行应用于哪些层或哪些层的子集。要深入了解 PyTorch 的张量并行性和其他节省内存的功能，以及如何设置管道和张量并行性的组合，请参阅 [PyTorch 的 SageMaker 模型并行库的扩展功能](https://docs.aws.amazon.com/sagemaker/latest/dg/model-parallel-extended-features-pytorch.html)。\\n```\\n问:Sagemaker如何使用张量并行技术对模型分层\"\n",
    "response_model = smr_client.invoke_endpoint_with_response_stream(\n",
    "            EndpointName=endpoint_name,\n",
    "            Body=json.dumps(\n",
    "            {\n",
    "                \"inputs\": prompts1,\n",
    "                \"parameters\": parameters,\n",
    "                \"history\" : [],\n",
    "                \"stream\":True\n",
    "            }\n",
    "            ),\n",
    "            ContentType=\"application/json\",\n",
    "        )\n",
    "\n",
    "event_stream = response_model['Body']\n",
    "scanner = StreamScanner()\n",
    "for event in event_stream:\n",
    "    scanner.write(event['PayloadPart']['Bytes'])\n",
    "    for line in scanner.readlines():\n",
    "        try:\n",
    "            resp = json.loads(line)\n",
    "            # print(resp)\n",
    "            print(resp.get(\"outputs\")['outputs'], end='')\n",
    "        except Exception as e:\n",
    "            print(line)\n",
    "            continue\n",
    "print (f\"\\ntime:{time.time()-start} s\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "619db19b-0072-4d25-a4df-5d59c2f6947b",
   "metadata": {},
   "source": [
    "## None stream"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "d577e076-52b2-4257-a447-1d3a5813d7ce",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "time:13.683655500411987 s\n",
      "这是一个未来的世界，人类文明已经拓展到整个宇宙。但是，随着人类的探索，他们发现了一个新的敌人——来自外太空的侵略者。\n",
      "\n",
      "这些外星人拥有强大的科技和军队，他们的目标是消灭人类文明，占领整个宇宙。人类的军队与他们进行了一场激烈的战斗，但是，他们的科技太过先进，人类的军队很快就败下阵来。\n",
      "\n",
      "在这个关键时刻，一个名叫李文的科学家提出了一个计划。他发现，这些外星人的科技是基于一个叫做“黑盒”的设备，只要摧毁这个黑盒，他们的科技就会失效。\n",
      "\n",
      "于是，李文组织了一支勇敢的队伍，他们决定去外太空寻找这个黑盒，并将其摧毁。他们乘坐着一艘先进的宇宙飞船，穿越了宇宙中的重重难关，最终找到了这个黑盒。\n",
      "\n",
      "在一场激烈的战斗中，他们成功地摧毁了这个黑盒，外星人的科技也因此失效。人类军队乘机反击，最终成功地打败了外星人，保护了人类的文明。\n",
      "\n",
      "从此以后，人类开始更加珍惜他们的科技和文明，也更加尊重和保护宇宙中的其他生物。\n"
     ]
    }
   ],
   "source": [
    "\n",
    "endpoint_name= 'qwen-14B-int4-2023-09-26-01-30-53-032-endpoint'\n",
    "prompts1 = \"\"\"AWS Clean Rooms 的FAQ文档有提到 Q: 是否发起者和数据贡献者都会被收费？A: 是单方收费，只有查询的接收方会收费。\n",
    "请问AWS Clean Rooms是多方都会收费吗？\n",
    "\"\"\"\n",
    "prompts1 = \"\"\"写一篇500字的科幻小说，背景关于宇宙战争\"\"\"\n",
    "start = time.time()\n",
    "response_model = smr_client.invoke_endpoint(\n",
    "            EndpointName=endpoint_name,\n",
    "            Body=json.dumps(\n",
    "            {\n",
    "                \"inputs\": prompts1,\n",
    "                \"parameters\": parameters,\n",
    "                \"history\" : [],\n",
    "            }\n",
    "            ),\n",
    "            ContentType=\"application/json\",\n",
    "        )\n",
    "\n",
    "resp = response_model['Body'].read()\n",
    "print (f\"\\ntime:{time.time()-start} s\")\n",
    "# print(resp.decode('utf8'))\n",
    "print(json.loads(resp)['outputs'])\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "21c8b703-e312-4964-8be9-a754468e07cd",
   "metadata": {},
   "source": [
    "#### 清除模型Endpoint和config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "id": "f70d116f-4fb1-4f04-8732-3d6e4fb520de",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!aws sagemaker delete-endpoint --endpoint-name {endpoint_name}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "id": "184e4d1d-3d62-43df-9b17-5d64ece928bd",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!aws sagemaker delete-endpoint-config --endpoint-config-name {endpoint_config_name}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "id": "707e8f09",
   "metadata": {},
   "outputs": [],
   "source": [
    "!aws sagemaker delete-model --model-name {model_name}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0fa1499-0cee-4a02-b828-f3de1b24e875",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "availableInstances": [
   {
    "_defaultOrder": 0,
    "_isFastLaunch": true,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 4,
    "name": "ml.t3.medium",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 1,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 8,
    "name": "ml.t3.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 2,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.t3.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 3,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.t3.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 4,
    "_isFastLaunch": true,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 8,
    "name": "ml.m5.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 5,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.m5.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 6,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.m5.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 7,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.m5.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 8,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.m5.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 9,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.m5.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 10,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.m5.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 11,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 384,
    "name": "ml.m5.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 12,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 8,
    "name": "ml.m5d.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 13,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.m5d.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 14,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.m5d.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 15,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.m5d.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 16,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.m5d.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 17,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.m5d.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 18,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.m5d.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 19,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 384,
    "name": "ml.m5d.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 20,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": true,
    "memoryGiB": 0,
    "name": "ml.geospatial.interactive",
    "supportedImageNames": [
     "sagemaker-geospatial-v1-0"
    ],
    "vcpuNum": 0
   },
   {
    "_defaultOrder": 21,
    "_isFastLaunch": true,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 4,
    "name": "ml.c5.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 22,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 8,
    "name": "ml.c5.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 23,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.c5.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 24,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.c5.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 25,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 72,
    "name": "ml.c5.9xlarge",
    "vcpuNum": 36
   },
   {
    "_defaultOrder": 26,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 96,
    "name": "ml.c5.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 27,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 144,
    "name": "ml.c5.18xlarge",
    "vcpuNum": 72
   },
   {
    "_defaultOrder": 28,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.c5.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 29,
    "_isFastLaunch": true,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.g4dn.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 30,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.g4dn.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 31,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.g4dn.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 32,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.g4dn.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 33,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 4,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.g4dn.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 34,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.g4dn.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 35,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 61,
    "name": "ml.p3.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 36,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 4,
    "hideHardwareSpecs": false,
    "memoryGiB": 244,
    "name": "ml.p3.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 37,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 488,
    "name": "ml.p3.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 38,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 768,
    "name": "ml.p3dn.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 39,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.r5.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 40,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.r5.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 41,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.r5.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 42,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.r5.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 43,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.r5.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 44,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 384,
    "name": "ml.r5.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 45,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 512,
    "name": "ml.r5.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 46,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 768,
    "name": "ml.r5.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 47,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.g5.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 48,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.g5.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 49,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.g5.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 50,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.g5.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 51,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.g5.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 52,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 4,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.g5.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 53,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 4,
    "hideHardwareSpecs": false,
    "memoryGiB": 384,
    "name": "ml.g5.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 54,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 768,
    "name": "ml.g5.48xlarge",
    "vcpuNum": 192
   },
   {
    "_defaultOrder": 55,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 1152,
    "name": "ml.p4d.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 56,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 1152,
    "name": "ml.p4de.24xlarge",
    "vcpuNum": 96
   }
  ],
  "instance_type": "ml.t3.medium",
  "kernelspec": {
   "display_name": "Python 3 (Data Science)",
   "language": "python",
   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-2:429704687514:image/datascience-1.0"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
